¿Qué es un tumor cerebral?
¶Un tumor cerebral ocurre cuando se forman células anormales dentro del cerebro. Hay dos tipos principales de tumores: tumores malignos y tumores benignos.
Descripción del conjunto de datos
¶Los datos de imagen que se usaron para este problema son Imágenes de resonancia magnética cerebral para la detección de tumores cerebrales. Consiste en resonancias magnéticas de cuatro clases:
no_tumor glioma_tumor meningioma_tumor pituitary_tumor from IPython.display import Image
ruta_imagen = 'C:/Users/Usuario/Desktop/modelado/Tumores.jpg'
display(Image(filename=ruta_imagen))
Información Relevante sobre el data set
¶Objetivo
¶Crear un sistema automatizado en la nube que pueda facilitar la detección de tumores cerebrales contruyendo un modelo de CNN que clasificará si el individuo tiene un tumor o no, con base en una Resonancia Magnética Nuclear (RMN).
¿Cómo podemos lograrlo?
¶La aplicación de técnicas de clasificación automatizadas utilizando Machine Learning(ML) e Inteligencia Artificial(AI) ha mostrado consistentemente precisión más alta que la clasificación manual.
Podemos lograrlo mediante 3 modelos:
1. CNN (Convolutional Neural Network)
2. ANN (Artificial Neural Network)
3. TL (Transfer Learning)
CNN sin efficientNet
¶Librerias Utilizadas
¶import seaborn as sns
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D
from tensorflow.keras.optimizers import Adam
from keras.models import load_model
from kerastuner import RandomSearch
from sklearn.metrics import classification_report
import os
import torch
import glob
import pathlib
from skimage.io import imread
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
/var/folders/q4/kct2nyps4zscxdwwyqdj98fc0000gn/T/ipykernel_4814/2327914300.py:18: DeprecationWarning: `import kerastuner` is deprecated, please use `import keras_tuner`. from kerastuner import RandomSearch
Construcción de Modelo con Keras
¶Preparación de los datos
¶train_img = []
train_labels = []
test_img = []
test_labels = []
path_train = ('./work/Training/')
path_test = ('./work/Testing/')
img_size= 300
for i in os.listdir(path_train):
for j in os.listdir(path_train+i):
train_img.append (cv2.resize(cv2.imread(path_train+i+'/'+j), (img_size,img_size)))
train_labels.append(i)
for i in os.listdir(path_test):
for j in os.listdir(path_test+i):
train_img.append (cv2.resize(cv2.imread(path_test+i+'/'+j), (img_size,img_size)))
train_labels.append(i)
train_img = (np.array(train_img))
# test_img = (np.array(test_img))
X_train, y_train = shuffle(train_img,train_labels, random_state=101)
train_img,test_img,train_labels,test_labels = train_test_split(X_train,y_train, test_size=0.2,random_state=101)
train_labels_encoded = [0 if category == 'no_tumor' else(1 if category == 'glioma_tumor' else(2 if category=='meningioma_tumor' else 3)) for category in list(train_labels)]
test_labels_encoded = [0 if category == 'no_tumor' else(1 if category == 'glioma_tumor' else(2 if category=='meningioma_tumor' else 3)) for category in list(test_labels)]
print("Shape of train: ",(train_img).shape," and shape of test: ", (test_img).shape)
Shape of train: (2611, 300, 300, 3) and shape of test: (653, 300, 300, 3)
Aumentando los datos
¶img_datagen = ImageDataGenerator(
rotation_range=30,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2,
horizontal_flip=True)
img_datagen.fit(X_train)
#img_datagen.fit(X_test)
Algunas Visualizaciónes
¶plt.figure(figsize = (15,15));
for i,j in enumerate(X_train):
if i<5:
plt.subplot(1,5,i+1)
plt.imshow(j);
plt.xlabel(train_labels[i]);
plt.tight_layout()
else:
break
plt.figure(figsize = (17,8));
lis = ['Train', 'Test']
a=sns.color_palette("rocket_r")
for i,j in enumerate([train_labels, test_labels]):
plt.subplot(1,2, i+1);
sns.countplot(x = j);
plt.xlabel(lis[i])
model = tf.keras.Sequential(
[
tf.keras.layers.Conv2D(kernel_size=(5,5) ,filters=32, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(kernel_size=(3,3),filters=32, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(kernel_size=(3,3) ,filters=32, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=(2,2)),
tf.keras.layers.Conv2D(kernel_size=(3,3) ,filters=64, activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=(2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(rate=0.5),
tf.keras.layers.Dense(4, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['accuracy'])
tensorboard = TensorBoard(log_dir = 'logs')
checkpoint = ModelCheckpoint("effnet.h5",monitor="val_accuracy",save_best_only=True,mode="auto",verbose=1)
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
mode='auto',verbose=1)
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`. WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
history = model.fit(tf.cast(train_img, tf.float32), np.array(pd.get_dummies(train_labels)),
validation_split=0.25, epochs =20, verbose=1, batch_size=32, callbacks=[checkpoint,reduce_lr, tensorboard])
Epoch 1/20 62/62 [==============================] - ETA: 0s - loss: 4.8601 - accuracy: 0.4259 Epoch 1: val_accuracy improved from -inf to 0.57734, saving model to effnet.h5 62/62 [==============================] - 74s 1s/step - loss: 4.8601 - accuracy: 0.4259 - val_loss: 0.9993 - val_accuracy: 0.5773 - lr: 0.0010 Epoch 2/20 62/62 [==============================] - ETA: 0s - loss: 0.9438 - accuracy: 0.6093 Epoch 2: val_accuracy improved from 0.57734 to 0.66003, saving model to effnet.h5 62/62 [==============================] - 79s 1s/step - loss: 0.9438 - accuracy: 0.6093 - val_loss: 0.8197 - val_accuracy: 0.6600 - lr: 0.0010 Epoch 3/20 62/62 [==============================] - ETA: 0s - loss: 0.7524 - accuracy: 0.6874 Epoch 3: val_accuracy improved from 0.66003 to 0.69525, saving model to effnet.h5 62/62 [==============================] - 79s 1s/step - loss: 0.7524 - accuracy: 0.6874 - val_loss: 0.7429 - val_accuracy: 0.6953 - lr: 0.0010 Epoch 4/20 62/62 [==============================] - ETA: 0s - loss: 0.5897 - accuracy: 0.7610 Epoch 4: val_accuracy improved from 0.69525 to 0.76876, saving model to effnet.h5 62/62 [==============================] - 74s 1s/step - loss: 0.5897 - accuracy: 0.7610 - val_loss: 0.5923 - val_accuracy: 0.7688 - lr: 0.0010 Epoch 5/20 62/62 [==============================] - ETA: 0s - loss: 0.4721 - accuracy: 0.8126 Epoch 5: val_accuracy improved from 0.76876 to 0.83614, saving model to effnet.h5 62/62 [==============================] - 75s 1s/step - loss: 0.4721 - accuracy: 0.8126 - val_loss: 0.4837 - val_accuracy: 0.8361 - lr: 0.0010 Epoch 6/20 62/62 [==============================] - ETA: 0s - loss: 0.3694 - accuracy: 0.8621 Epoch 6: val_accuracy improved from 0.83614 to 0.84992, saving model to effnet.h5 62/62 [==============================] - 78s 1s/step - loss: 0.3694 - accuracy: 0.8621 - val_loss: 0.4518 - val_accuracy: 0.8499 - lr: 0.0010 Epoch 7/20 62/62 [==============================] - ETA: 0s - loss: 0.3286 - accuracy: 0.8769 Epoch 7: val_accuracy did not improve from 0.84992 62/62 [==============================] - 82s 1s/step - loss: 0.3286 - accuracy: 0.8769 - val_loss: 0.4939 - val_accuracy: 0.8484 - lr: 0.0010 Epoch 8/20 62/62 [==============================] - ETA: 0s - loss: 0.3011 - accuracy: 0.8784 Epoch 8: val_accuracy improved from 0.84992 to 0.86677, saving model to effnet.h5 62/62 [==============================] - 94s 2s/step - loss: 0.3011 - accuracy: 0.8784 - val_loss: 0.4067 - val_accuracy: 0.8668 - lr: 0.0010 Epoch 9/20 62/62 [==============================] - ETA: 0s - loss: 0.2614 - accuracy: 0.8968 Epoch 9: val_accuracy improved from 0.86677 to 0.87749, saving model to effnet.h5 62/62 [==============================] - 106s 2s/step - loss: 0.2614 - accuracy: 0.8968 - val_loss: 0.4537 - val_accuracy: 0.8775 - lr: 0.0010 Epoch 10/20 62/62 [==============================] - ETA: 0s - loss: 0.2500 - accuracy: 0.9081 Epoch 10: val_accuracy improved from 0.87749 to 0.88361, saving model to effnet.h5 62/62 [==============================] - 113s 2s/step - loss: 0.2500 - accuracy: 0.9081 - val_loss: 0.4417 - val_accuracy: 0.8836 - lr: 0.0010 Epoch 11/20 62/62 [==============================] - ETA: 0s - loss: 0.2354 - accuracy: 0.9050 Epoch 11: val_accuracy improved from 0.88361 to 0.89280, saving model to effnet.h5 62/62 [==============================] - 117s 2s/step - loss: 0.2354 - accuracy: 0.9050 - val_loss: 0.3963 - val_accuracy: 0.8928 - lr: 0.0010 Epoch 12/20 62/62 [==============================] - ETA: 0s - loss: 0.1504 - accuracy: 0.9428 Epoch 12: val_accuracy did not improve from 0.89280 62/62 [==============================] - 137s 2s/step - loss: 0.1504 - accuracy: 0.9428 - val_loss: 0.4515 - val_accuracy: 0.8882 - lr: 0.0010 Epoch 13/20 62/62 [==============================] - ETA: 0s - loss: 0.1955 - accuracy: 0.9193 Epoch 13: val_accuracy did not improve from 0.89280 Epoch 13: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354. 62/62 [==============================] - 177s 3s/step - loss: 0.1955 - accuracy: 0.9193 - val_loss: 0.4372 - val_accuracy: 0.8760 - lr: 0.0010 Epoch 14/20 62/62 [==============================] - ETA: 0s - loss: 0.1607 - accuracy: 0.9413 Epoch 14: val_accuracy did not improve from 0.89280 62/62 [==============================] - 170s 3s/step - loss: 0.1607 - accuracy: 0.9413 - val_loss: 0.4373 - val_accuracy: 0.8897 - lr: 3.0000e-04 Epoch 15/20 62/62 [==============================] - ETA: 0s - loss: 0.0912 - accuracy: 0.9683 Epoch 15: val_accuracy did not improve from 0.89280 Epoch 15: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05. 62/62 [==============================] - 169s 3s/step - loss: 0.0912 - accuracy: 0.9683 - val_loss: 0.4826 - val_accuracy: 0.8913 - lr: 3.0000e-04 Epoch 16/20 62/62 [==============================] - ETA: 0s - loss: 0.0706 - accuracy: 0.9765 Epoch 16: val_accuracy improved from 0.89280 to 0.89433, saving model to effnet.h5 62/62 [==============================] - 160s 3s/step - loss: 0.0706 - accuracy: 0.9765 - val_loss: 0.4962 - val_accuracy: 0.8943 - lr: 9.0000e-05 Epoch 17/20 62/62 [==============================] - ETA: 0s - loss: 0.0635 - accuracy: 0.9724 Epoch 17: val_accuracy did not improve from 0.89433 62/62 [==============================] - 149s 2s/step - loss: 0.0635 - accuracy: 0.9724 - val_loss: 0.5121 - val_accuracy: 0.8928 - lr: 9.0000e-05 Epoch 18/20 62/62 [==============================] - ETA: 0s - loss: 0.0571 - accuracy: 0.9796 Epoch 18: val_accuracy did not improve from 0.89433 Epoch 18: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05. 62/62 [==============================] - 154s 2s/step - loss: 0.0571 - accuracy: 0.9796 - val_loss: 0.5299 - val_accuracy: 0.8943 - lr: 9.0000e-05 Epoch 19/20 62/62 [==============================] - ETA: 0s - loss: 0.0657 - accuracy: 0.9760 Epoch 19: val_accuracy did not improve from 0.89433 62/62 [==============================] - 154s 2s/step - loss: 0.0657 - accuracy: 0.9760 - val_loss: 0.5314 - val_accuracy: 0.8943 - lr: 2.7000e-05 Epoch 20/20 62/62 [==============================] - ETA: 0s - loss: 0.0518 - accuracy: 0.9770 Epoch 20: val_accuracy improved from 0.89433 to 0.89740, saving model to effnet.h5 62/62 [==============================] - 160s 3s/step - loss: 0.0518 - accuracy: 0.9770 - val_loss: 0.5410 - val_accuracy: 0.8974 - lr: 2.7000e-05
Algunas Notas
¶Agregar relleno aquí para preservar la forma de la imagen
Agregar la capa maxpool para obtener los píxeles que tienen la mayor intensidad. Reduce el tamaño de la imagen en 2.
Tomamos filtros de 77 como tamaño de imagen si eran de 300 300, por lo que ayudaría a aprender mejor.
Tomada una "drop out layer", para evitar el overfitting.
La imagen del generador de datos es del tipo 'uint8', debe reescribirla en 'float32'.
filename = 'CNN_Brain_Tumor_60_20_20.h5'
model.save(filename)
model_final = load_model(filename)
# change directory
#os.chdir(r'../input/brain-tumor-classification-mri')
#print(os.getcwd())
#save the model
#model.save('/kaggle/working/EfficientNetB3.h5')
#model.save_weights('/kaggle/working/EfficientNetB3_weights.h5')
#plot loss and accuracy
pd.DataFrame(history.history).plot(figsize=(8, 5))
#plt.grid(True)
#plt.gca().set_xlim(0,33)
plt.gca().set_ylim(0,1)
# # plt.savefig('/kaggle/working/EfficientNetB3.png')
loss, accuracy = model.evaluate(tf.cast(test_img, tf.float32), np.array(pd.get_dummies(test_labels)))
# #print accuracy
print('Accuracy: %f' % (accuracy*100))
21/21 [==============================] - 5s 240ms/step - loss: 0.6808 - accuracy: 0.8836 Accuracy: 88.361406
lb = LabelEncoder()
labels_train=lb.fit(train_labels)
pred=np.argmax(model.predict(test_img),axis=1)
pred_labels=lb.inverse_transform(pred) #predicted labels
val_y_lab = lb.transform(test_labels)
21/21 [==============================] - 4s 199ms/step
classes = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']
cm = confusion_matrix(val_y_lab,pred)
fig,ax= plt.subplots(figsize=(10.2,8.1))
a=sns.color_palette("rocket_r") #_r inverts the palette
sns.heatmap(cm, annot=True,fmt='g',linewidths=1,linecolor='white',robust=True,annot_kws={"size":18},cmap=a)
#annot_kws: settings about annotations
ax.xaxis.set_ticklabels(classes); ax.yaxis.set_ticklabels(classes);
plt.yticks(va="center")
plt.title('Confusion Matrix',fontsize=18,pad=18)
plt.xlabel('Actual class',labelpad=22,fontsize=14)
plt.ylabel('Predicted class',labelpad=22,fontsize=14)
#plt.savefig(os.path.join('plots/','EfficientNetB3_confusion_matrix.png'), dpi=300)
Text(98.2222222222222, 0.5, 'Predicted class')
print(classification_report(val_y_lab,pred,target_names=classes))
precision recall f1-score support
glioma_tumor 0.89 0.85 0.87 199
meningioma_tumor 0.83 0.82 0.83 190
no_tumor 0.79 0.86 0.83 88
pituitary_tumor 0.94 0.97 0.96 176
accuracy 0.87 653
macro avg 0.87 0.87 0.87 653
weighted avg 0.88 0.87 0.87 653
Construcción de Modelo con CLASIFICADOR CONVNET
¶import IPython
IPython.Application.instance().kernel.do_shutdown(True) #automatically restarts kernel
{'status': 'ok', 'restart': True}
El Kernel se bloqueó al ejecutar código en la celda actual o en una celda anterior. Revise el código de las celdas para identificar una posible causa del error. Haga clic <a href='https://aka.ms/vscodeJupyterKernelCrash'>aquí</a> para obtener más información. Vea el [registro] de Jupyter (command:jupyter.viewOutput) para obtener más detalles.
#Check if GPU is used
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print(f'Found GPU at: {device_name}')
Found GPU at: /device:GPU:0
2023-06-16 04:28:43.897655: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 2023-06-16 04:28:43.897681: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB 2023-06-16 04:28:43.897691: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB 2023-06-16 04:28:43.898307: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support. 2023-06-16 04:28:43.898599: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
Librerias Utilizadas
¶#data manipulation
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
import os
#image manipulation
from PIL import Image
import cv2
#maths
import numpy as np
from scipy.stats import kstest, norm, mode
import statistics as st
#visualization
import seaborn as sns
import matplotlib.pyplot as plt
#models
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import classification_report,confusion_matrix
#logs beautifier and configs
from IPython.display import display, Markdown, Latex, clear_output
from tqdm import tqdm
from warnings import filterwarnings
#import ipywidgets as widgets
import io
img_folder_path = './work/'
effnet_arc_img_path = './Keras-arch/CNN_EficcientNet_Model_Arc/CNN_EficcientNet_Model_Arc.png'
#The function that uses this parameter needs a folder to save variables, weights, etc
effnet_model_persistence_path = './CNN-models/CNN_EficcientNet_Model'
ann_persistence_path = "./CNN-models/ANN_Model/ann.h5"
ann_arc_img_path = './Keras-arch/ANN_Model_Arc/ANN_Model_Arc.png'
Utilidades
¶def count_classes(labels,y_train):
counts = np.zeros((len(labels),), dtype=int)
for i in range(len(y_train)):
if y_train[i] == labels[0]:
counts[0] += 1
if y_train[i] == labels[1]:
counts[1] += 1
if y_train[i] == labels[2]:
counts[2] += 1
if y_train[i] == labels[3]:
counts[3] += 1
return counts
def print_images_from_classes(title,labels,X,y):
fig, ax = plt.subplots(1,4,figsize=(20,20))
fig.text(s=title,size=18,fontweight='bold',
fontname='monospace',y=0.62,x=0.4,alpha=0.8)
for k, i in enumerate(labels):
j=0
while True :
if y[j]==i:
ax[k].imshow(X[j])
ax[k].set_title(y[j])
ax[k].axis('off')
break
j+=1
#uploader = widgets.FileUpload()
#display(uploader)
def img_pred(upload,image_size):
for name, file_info in uploader.value.items():
img = Image.open(io.BytesIO(file_info['content']))
opencvImage = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
img = cv2.resize(opencvImage,(image_size,image_size))
img = img.reshape(1,image_size,image_size,3)
p = model.predict(img)
p = np.argmax(p,axis=1)[0]
if p==0:
p='Glioma Tumor'
elif p==1:
print('No hay Tumor')
elif p==2:
p='Meningioma Tumor'
else:
p='Pituitary Tumor'
if p!=1:
print(f'The Model predicts that it is a {p}')
#button = widgets.Button(description='Predict')
#out = widgets.Output()
def on_button_clicked(_):
with out:
clear_output()
try:
img_pred(uploader)
except:
print('No Image Uploaded/Invalid Image File')
#button.on_click(on_button_clicked)
#widgets.VBox([button,out])
def img_pred(X_test,y_test,rows,model):
if rows**2 > len(y_test_encoded):
raise SystemError('Muestra mas grande que el numero de imagenes de test.')
pred = model.predict(X_test[:rows**2])
pred = np.argmax(pred,axis=1)
real = np.argmax(y_test[:rows**2],axis=1)
fig, ax = plt.subplots(rows,rows,figsize=(20,20))
fig.text(s="Predictions",size=24,fontweight='bold',
fontname='monospace',y=0.62,x=0.4,alpha=0.8)
r = 0
c = 0
for i in range(len(pred)):
if c == rows:
c = 0
r += 1
if pred[i]==0:
predicted='Glioma'
elif pred[i]==1:
predicted = 'No hay Tumor'
elif pred[i]==2:
predicted='Meningioma'
else:
predicted='Pituitary'
if real[i]==0:
re = 'Glioma'
elif real[i]==1:
re = 'No hay Tumor'
elif real[i]==2:
re='Meningioma'
else:
re='Pituitary'
#Turn to image
opencvImage = cv2.cvtColor(X_test[i], cv2.COLOR_RGB2BGR)
img = cv2.resize(opencvImage,(image_size,image_size))
ax[r,c].imshow(img)
ax[r,c].set_title(f"real:{re} predicted:{predicted}")
ax[r,c].axis('off')
c += 1
labels = ['glioma_tumor','no_tumor','meningioma_tumor','pituitary_tumor']
#loading images
X_train = []
y_train = []
X_test = []
y_test = []
image_size = 150
for i in labels:
folderPath = os.path.join(img_folder_path,'Training',i)
for j in tqdm(os.listdir(folderPath)):
img = cv2.imread(os.path.join(folderPath,j))
img = cv2.resize(img,(image_size, image_size))
X_train.append(img)
y_train.append(i)
for i in labels:
folderPath = os.path.join(img_folder_path,'Testing',i)
for j in tqdm(os.listdir(folderPath)):
img = cv2.imread(os.path.join(folderPath,j))
img = cv2.resize(img,(image_size,image_size))
X_train.append(img)
y_train.append(i)
X_train = np.array(X_train)
y_train = np.array(y_train)
100%|██████████| 826/826 [00:00<00:00, 1133.16it/s] 100%|██████████| 395/395 [00:00<00:00, 1076.27it/s] 100%|██████████| 822/822 [00:00<00:00, 1199.38it/s] 100%|██████████| 827/827 [00:00<00:00, 1020.85it/s] 100%|██████████| 100/100 [00:00<00:00, 1187.97it/s] 100%|██████████| 105/105 [00:00<00:00, 2238.37it/s] 100%|██████████| 115/115 [00:00<00:00, 1496.06it/s] 100%|██████████| 74/74 [00:00<00:00, 530.45it/s]
X_train, y_train = shuffle(X_train,y_train, random_state=101)
X_train,X_test,y_train,y_test = train_test_split(X_train,y_train, test_size=0.1,random_state=101)
display(Markdown(f"**X_train:** {X_train.shape}\n\n**y_train:** {y_train.shape}\n\n**X_test:** {X_test.shape}\n\n**y_test:** {y_test.shape}\n\n"))
X_train: (2937, 150, 150, 3)
y_train: (2937,)
X_test: (327, 150, 150, 3)
y_test: (327,)
counts = count_classes(labels,y_train) print(counts, counts.sum())
sns.barplot(x=labels,y=counts/len(y_train), palette="ch:.25")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[10], line 1 ----> 1 sns.barplot(x=labels,y=counts/len(y_train), palette="ch:.25") NameError: name 'counts' is not defined
print_images_from_classes("-Train- Primera Imagen de cada clase",labels,X=X_train,y=y_train)
print_images_from_classes("-Test- First Image Of Every Class",labels,X=X_test,y=y_test)
#One-Hot Encoding del target value
y_train_encoded = [labels.index(i) for i in y_train]
y_train = y_train_encoded
y_train = tf.keras.utils.to_categorical(y_train)
y_test_encoded = [labels.index(i) for i in y_test]
y_test = y_test_encoded
y_test = tf.keras.utils.to_categorical(y_test)
Tranfer Learning
¶Transfer Learning se enfoca en aplicar el conocimiento adquirido al resolver una tarea a una tarea relacionada.
En este caso EfficientNet es considerada la arquitectura de CNN mas poderosa desarrollado para datasets considerados como puntos de referencia en vision artificial.
Estos son los resultados de EfficientNet en U-Net, el metodo mas usado para segmentacion de imagenes.
Y este el desempeño de cada modelo de efficientNet
En este caso considerando un tradeoff entre complejidad del modelo y desempeño elegimos usar EfficientNetB2, la siguiente es una imagen de la arquitectura explicada en un alto nivel.
#Importing efficientnet weights as the foundation of our model
effnet = EfficientNetB2(weights='imagenet',include_top=False,input_shape=(image_size,image_size,3))
#Callbacks (earlystopping, tensorboard, checkpoint, reduce_lr)
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,verbose=1,restore_best_weights=True)
tensorboard = TensorBoard(log_dir = 'logs')
#checkpoint = ModelCheckpoint("effnet.h5",monitor="val_accuracy",save_best_only=True,mode="auto",verbose=1)
"""
Cant use checkpoint because versions of tensor_flow higher than 2.9.1 have problems auto serializing efficientNet
TypeError: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.
"""
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
mode='auto',verbose=1)
#MODEL
model = effnet.output
model = tf.keras.layers.GlobalAveragePooling2D()(model)
model = tf.keras.layers.Dropout(rate=0.5)(model)
model = tf.keras.layers.Dense(4,activation='softmax')(model)
model = tf.keras.models.Model(inputs=effnet.input, outputs = model)
#Create the model
model.compile(loss='categorical_crossentropy',optimizer = 'Adam', metrics= ['accuracy'])
#Ploting Architecture
plot_model(model, to_file=effnet_arc_img_path, show_shapes=True, show_layer_names=True)
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
# TRAINING
history = model.fit(X_train,y_train,validation_split=0.15, epochs =50, verbose=1, batch_size=32,
callbacks=[earlystopping,tensorboard,reduce_lr])
Epoch 1/50
2023-06-16 04:31:12.370482: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
78/78 [==============================] - ETA: 0s - loss: 0.5750 - accuracy: 0.7829
2023-06-16 04:32:36.695489: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
78/78 [==============================] - 96s 870ms/step - loss: 0.5750 - accuracy: 0.7829 - val_loss: 1.0697 - val_accuracy: 0.6689 - lr: 0.0010 Epoch 2/50 78/78 [==============================] - 64s 813ms/step - loss: 0.2273 - accuracy: 0.9151 - val_loss: 0.3855 - val_accuracy: 0.8662 - lr: 0.0010 Epoch 3/50 78/78 [==============================] - 67s 856ms/step - loss: 0.1142 - accuracy: 0.9595 - val_loss: 0.3519 - val_accuracy: 0.9116 - lr: 0.0010 Epoch 4/50 78/78 [==============================] - 77s 992ms/step - loss: 0.1394 - accuracy: 0.9523 - val_loss: 0.7583 - val_accuracy: 0.8005 - lr: 0.0010 Epoch 5/50 78/78 [==============================] - ETA: 0s - loss: 0.1226 - accuracy: 0.9531 Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354. 78/78 [==============================] - 69s 884ms/step - loss: 0.1226 - accuracy: 0.9531 - val_loss: 0.4839 - val_accuracy: 0.8753 - lr: 0.0010 Epoch 6/50 78/78 [==============================] - 71s 910ms/step - loss: 0.0512 - accuracy: 0.9816 - val_loss: 0.1406 - val_accuracy: 0.9683 - lr: 3.0000e-04 Epoch 7/50 78/78 [==============================] - 66s 839ms/step - loss: 0.0404 - accuracy: 0.9864 - val_loss: 0.1465 - val_accuracy: 0.9705 - lr: 3.0000e-04 Epoch 8/50 78/78 [==============================] - 65s 837ms/step - loss: 0.0319 - accuracy: 0.9900 - val_loss: 0.1252 - val_accuracy: 0.9751 - lr: 3.0000e-04 Epoch 9/50 78/78 [==============================] - 71s 905ms/step - loss: 0.0293 - accuracy: 0.9888 - val_loss: 0.1330 - val_accuracy: 0.9773 - lr: 3.0000e-04 Epoch 10/50 78/78 [==============================] - 63s 801ms/step - loss: 0.0294 - accuracy: 0.9888 - val_loss: 0.1212 - val_accuracy: 0.9796 - lr: 3.0000e-04 Epoch 11/50 78/78 [==============================] - 63s 803ms/step - loss: 0.0208 - accuracy: 0.9928 - val_loss: 0.1313 - val_accuracy: 0.9773 - lr: 3.0000e-04 Epoch 12/50 78/78 [==============================] - ETA: 0s - loss: 0.0195 - accuracy: 0.9936 Epoch 12: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05. 78/78 [==============================] - 67s 860ms/step - loss: 0.0195 - accuracy: 0.9936 - val_loss: 0.1222 - val_accuracy: 0.9728 - lr: 3.0000e-04 Epoch 13/50 78/78 [==============================] - ETA: 0s - loss: 0.0180 - accuracy: 0.9940Restoring model weights from the end of the best epoch: 10. 78/78 [==============================] - 63s 804ms/step - loss: 0.0180 - accuracy: 0.9940 - val_loss: 0.1243 - val_accuracy: 0.9773 - lr: 9.0000e-05 Epoch 13: early stopping
#Visualize performance of the model
filterwarnings('ignore')
epochs = list(range(13))
fig, ax = plt.subplots(1,2,figsize=(14,7))
train_acc = history.history['accuracy']
train_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
fig.text(s='Epochs vs. Training and Validation Accuracy/Loss',size=18,fontweight='bold',
fontname='monospace',color="#5B505E",y=1,x=0.28,alpha=0.8)
sns.despine()
ax[0].plot(epochs, train_acc, marker='o',markerfacecolor="#B4ABEB",color="#B3BCF5",
label = 'Training Accuracy')
ax[0].plot(epochs, val_acc, marker='o',markerfacecolor="#A89659",color="#F5E5B3",
label = 'Validation Accuracy')
ax[0].legend(frameon=False)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
sns.despine()
ax[1].plot(epochs, train_loss, marker='o',markerfacecolor="#B4ABEB",color="#B3BCF5",
label ='Training Loss')
ax[1].plot(epochs, val_loss, marker='o',markerfacecolor="#A89659",color="#F5E5B3",
label = 'Validation Loss')
ax[1].legend(frameon=False)
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Training & Validation Loss')
fig.show()
#PREDICTION
pred = model.predict(X_test)
pred = np.argmax(pred,axis=1)
y_test_encode = np.argmax(y_test,axis=1)
print(classification_report(y_test_encode,pred))
fig,ax=plt.subplots(1,1,figsize=(14,7))
sns.heatmap(confusion_matrix(y_test_encode,pred),ax=ax,xticklabels=labels,yticklabels=labels,annot=True,
cmap=["#807C5E", "#BFB98E", '#E6DFAA', '#F0EBB6', '#FFF7BD'],alpha=0.7,linewidths=2,linecolor="#5B505E")
fig.text(s='Heatmap of the Confusion Matrix',size=18,fontweight='bold',
fontname='monospace',color="#5B505E",y=0.92,x=0.28,alpha=0.8)
plt.show()
2023-06-16 04:50:45.052932: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
11/11 [==============================] - 11s 729ms/step
precision recall f1-score support
0 0.95 0.89 0.92 93
1 0.98 1.00 0.99 51
2 0.90 0.96 0.93 96
3 1.00 0.99 0.99 87
accuracy 0.95 327
macro avg 0.96 0.96 0.96 327
weighted avg 0.96 0.95 0.95 327
img_pred(X_test,y_test,4,model)
1/1 [==============================] - 5s 5s/step
Construcción de modelo CNN con Keras
¶#Callbacks (earlystopping, tensorboard, checkpoint, reduce_lr)
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,verbose=1,restore_best_weights=True)
tensorboard = TensorBoard(log_dir = 'logs')
checkpoint = ModelCheckpoint(ann_persistence_path,monitor="val_accuracy",save_best_only=True,mode="auto",verbose=1)
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
mode='auto',verbose=1)
#MODEL
ann = Sequential([
tf.keras.layers.Flatten(input_shape=(image_size,image_size,3)),
tf.keras.layers.Dense(3000, activation='relu'),
tf.keras.layers.Dense(1000, activation='relu'),
tf.keras.layers.Dense(4, activation='softmax')
])
#Create the model
ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#Ploting Architecture
plot_model(ann, to_file=ann_arc_img_path, show_shapes=True, show_layer_names=True)
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
# TRAINING
history_ann = ann.fit(X_train,y_train,validation_split=0.15, epochs =50, verbose=1, batch_size=32,
callbacks=[earlystopping,tensorboard,reduce_lr,checkpoint])
Epoch 1/50
2023-06-16 04:51:09.933087: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
78/78 [==============================] - ETA: 0s - loss: 18725.7617 - accuracy: 0.3646
2023-06-16 04:51:35.307175: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
Epoch 1: val_accuracy improved from -inf to 0.36508, saving model to ./CNN-models/ANN_Model/ann.h5 78/78 [==============================] - 34s 402ms/step - loss: 18725.7617 - accuracy: 0.3646 - val_loss: 5674.6060 - val_accuracy: 0.3651 - lr: 0.0010 Epoch 2/50 78/78 [==============================] - ETA: 0s - loss: 9867.1094 - accuracy: 0.4083 Epoch 2: val_accuracy improved from 0.36508 to 0.50340, saving model to ./CNN-models/ANN_Model/ann.h5 78/78 [==============================] - 28s 350ms/step - loss: 9867.1094 - accuracy: 0.4083 - val_loss: 9190.0732 - val_accuracy: 0.5034 - lr: 0.0010 Epoch 3/50 78/78 [==============================] - ETA: 0s - loss: 6313.2153 - accuracy: 0.4475 Epoch 3: val_accuracy did not improve from 0.50340 78/78 [==============================] - 23s 294ms/step - loss: 6313.2153 - accuracy: 0.4475 - val_loss: 5703.4868 - val_accuracy: 0.4150 - lr: 0.0010 Epoch 4/50 78/78 [==============================] - ETA: 0s - loss: 4808.7886 - accuracy: 0.4796Restoring model weights from the end of the best epoch: 1. Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354. Epoch 4: val_accuracy did not improve from 0.50340 78/78 [==============================] - 26s 331ms/step - loss: 4808.7886 - accuracy: 0.4796 - val_loss: 10033.1396 - val_accuracy: 0.4694 - lr: 0.0010 Epoch 4: early stopping
#Visualize performance of the model
filterwarnings('ignore')
epochs = list(range(17))
fig, ax = plt.subplots(1,2,figsize=(14,7))
train_acc = history_ann.history['accuracy']
train_loss = history_ann.history['loss']
val_acc = history_ann.history['val_accuracy']
val_loss = history_ann.history['val_loss']
fig.text(s='Epochs vs. Training and Validation Accuracy/Loss',size=18,fontweight='bold',
fontname='monospace',color="#5B505E",y=1,x=0.28,alpha=0.8)
sns.despine()
ax[0].plot(epochs, train_acc, marker='o',markerfacecolor="#B4ABEB",color="#B3BCF5",
label = 'Training Accuracy')
ax[0].plot(epochs, val_acc, marker='o',markerfacecolor="#A89659",color="#F5E5B3",
label = 'Validation Accuracy')
ax[0].legend(frameon=False)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
sns.despine()
ax[1].plot(epochs, train_loss, marker='o',markerfacecolor="#B4ABEB",color="#B3BCF5",
label ='Training Loss')
ax[1].plot(epochs, val_loss, marker='o',markerfacecolor="#A89659",color="#F5E5B3",
label = 'Validation Loss')
ax[1].legend(frameon=False)
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Training & Validation Loss')
fig.show()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[28], line 15 11 fig.text(s='Epochs vs. Training and Validation Accuracy/Loss',size=18,fontweight='bold', 12 fontname='monospace',color="#5B505E",y=1,x=0.28,alpha=0.8) 14 sns.despine() ---> 15 ax[0].plot(epochs, train_acc, marker='o',markerfacecolor="#B4ABEB",color="#B3BCF5", 16 label = 'Training Accuracy') 17 ax[0].plot(epochs, val_acc, marker='o',markerfacecolor="#A89659",color="#F5E5B3", 18 label = 'Validation Accuracy') 19 ax[0].legend(frameon=False) File ~/miniconda3/envs/osx/lib/python3.11/site-packages/matplotlib/axes/_axes.py:1688, in Axes.plot(self, scalex, scaley, data, *args, **kwargs) 1445 """ 1446 Plot y versus x as lines and/or markers. 1447 (...) 1685 (``'green'``) or hex strings (``'#008000'``). 1686 """ 1687 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D) -> 1688 lines = [*self._get_lines(*args, data=data, **kwargs)] 1689 for line in lines: 1690 self.add_line(line) File ~/miniconda3/envs/osx/lib/python3.11/site-packages/matplotlib/axes/_base.py:311, in _process_plot_var_args.__call__(self, data, *args, **kwargs) 309 this += args[0], 310 args = args[1:] --> 311 yield from self._plot_args( 312 this, kwargs, ambiguous_fmt_datakey=ambiguous_fmt_datakey) File ~/miniconda3/envs/osx/lib/python3.11/site-packages/matplotlib/axes/_base.py:504, in _process_plot_var_args._plot_args(self, tup, kwargs, return_kwargs, ambiguous_fmt_datakey) 501 self.axes.yaxis.update_units(y) 503 if x.shape[0] != y.shape[0]: --> 504 raise ValueError(f"x and y must have same first dimension, but " 505 f"have shapes {x.shape} and {y.shape}") 506 if x.ndim > 2 or y.ndim > 2: 507 raise ValueError(f"x and y can be no greater than 2D, but have " 508 f"shapes {x.shape} and {y.shape}") ValueError: x and y must have same first dimension, but have shapes (17,) and (4,)
#PREDICTION
pred = ann.predict(X_test)
pred = np.argmax(pred,axis=1)
y_test_encode = np.argmax(y_test,axis=1)
print(classification_report(y_test_encode,pred))
fig,ax=plt.subplots(1,1,figsize=(14,7))
sns.heatmap(confusion_matrix(y_test_encode,pred),ax=ax,xticklabels=labels,yticklabels=labels,annot=True,
cmap=["#807C5E", "#BFB98E", '#E6DFAA', '#F0EBB6', '#FFF7BD'],alpha=0.7,linewidths=2,linecolor="#5B505E")
fig.text(s='Heatmap of the Confusion Matrix',size=18,fontweight='bold',
fontname='monospace',color="#5B505E",y=0.92,x=0.28,alpha=0.8)
plt.show()
1/11 [=>............................] - ETA: 1s
2023-06-16 04:53:20.287468: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
11/11 [==============================] - 1s 116ms/step
precision recall f1-score support
0 0.47 0.34 0.40 93
1 0.20 0.67 0.31 51
2 0.48 0.12 0.20 96
3 0.74 0.56 0.64 87
accuracy 0.39 327
macro avg 0.47 0.42 0.39 327
weighted avg 0.50 0.39 0.39 327
img_pred(X_test,y_test,4,ann)
1/1 [==============================] - 0s 61ms/step